# -*- coding: utf-8 -*-
from lxml import etree
import urllib.parse
from selenium import webdriver
import time
from scrapy.http import HtmlResponse


class Baidus(object):
    """docstring for Baidus."""


    def __init__(self):
        # super(Baidus, self).__init__()
        # self.arg = arg
        self.pages = 0
        self.end_pages = 20
        self.outre = []
        # print(self.pages)

    def getHtml(self,keywords):
        driver = webdriver.PhantomJS() #指定使用的浏览器
        # driver = webdriver.PhantomJS(executable_path="G:/Anaconda3/phantomjs.exe") #可以使用路径
        # driver = webdriver.Firefox()
        url = "https://www.baidu.com/s?wd="+urllib.parse.quote(keywords)
        driver.get(url)
        time.sleep(1)
        js = "var q=document.documentElement.scrollTop=10000"
        driver.execute_script(js) #可执行js，模仿用户操作。此处为将页面拉至最
        body = driver.page_source
        # HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)

        html = etree.HTML(body)
        # 取得相关搜索列表关键词
        re = html.xpath("//div[@id='rs']/table/tbody/tr/th/a/text()")
        # re = self.tochar(re)
        print("获取关键【%s】词页面" % keywords,self.pages)
        return re
        pass


    def getPages(self,a):
        """抓取子页面"""
        self.pages = self.pages + 1
        xianguan = self.getHtml(a)
        self.outre = self.outre + xianguan
        # print(xianguan)


    def getIndex(self,keywords):
        """入口页面"""
        # print(self.getHtml(keywords))
        self.pages = self.pages + 1
        xianguan = self.getHtml(keywords)
        self.outre = self.outre + xianguan
        # print(xianguan)
        for b in xianguan:
            self.getPages(b)
            pass

        pass

    def start(self,arg):
        filename = arg + "【百度相关关键词】.txt"

        self.getIndex(arg)
        if self.outre:
            words = self.outre
            file_object = open(filename,'a')
            all_the_text = '\n'.join(words)
            file_object.write(all_the_text)
            file_object.close()

        pass

if __name__ == '__main__':
    Baidus().start("静音窗")
    # a.getPages()
